//package com.zlm.spider.test;
//
//import us.codecraft.webmagic.Page;
//import us.codecraft.webmagic.Site;
//import us.codecraft.webmagic.Spider;
//import us.codecraft.webmagic.processor.PageProcessor;
//import us.codecraft.webmagic.scheduler.BloomFilterDuplicateRemover;
//import us.codecraft.webmagic.scheduler.DuplicateRemovedScheduler;
//import us.codecraft.webmagic.scheduler.QueueScheduler;
//import us.codecraft.webmagic.selector.Html;
//
///**
// * Author: Harbour
// * Date: 2021-04-13 13:03
// * Desc:
// */
//public class WebMagicTest1 implements PageProcessor {
//
//    private Site site = Site.me()
//            .setUserAgent("Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0")
//            .setCharset("GBK")  // 设置编码
//            .setTimeOut(1000 * 10) // 设置超时时间
//            .setRetrySleepTime(3000) // 设置重试超时时间
//            .setUseGzip(true)
//            .setRetryTimes(3); // 设置重试次数
//
//
//
//    @Override
//    public void process(Page page) {
//
//        Html html = page.getHtml();
//
//        String s = html.xpath("/html/body/div[3]/div[2]/div[3]/div[1]/div").toString().split("<div class=\"mt10\"> ")[0];
//        String s1 = s.replace("<div class=\"bmsg job_msg inbox\">", "")
//                .replace("<p>", "")
//                .replace("</p>","")
//                .replace("<br>","")
//                .replace("<br>","");
//
////        s
////        s.replace("<br>","");
////        s.replace("<br>","");
//
//        String[] s2 = html.xpath("/html/body/div[3]/div[2]/div[2]/div/div[1]/p[2]/text()").get().replace("&nbsp;", "").split("<span>\\|</span>");
//
//
//        for (String s3 : s2) {
//            System.out.println(s3);
//        }
////        System.out.println(type);
//
//    }
//
//    public void process () {
//        String url = "https://jobs.51job.com/shanghai-pdxq/130954011.html?s=sou_sou_soulb&t=0";
//        DuplicateRemovedScheduler scheduler = new QueueScheduler()
//                .setDuplicateRemover(new BloomFilterDuplicateRemover(1000 * 1000));
//        Spider.create(new WebMagicTest1())
//                .addUrl(url)
//                .setScheduler(scheduler)
//                .thread(10)
//                .run();
//    }
//
//    @Override
//    public Site getSite() {
//        return site;
//    }
//
//    public static void main(String[] args) {
//        new WebMagicTest1().process();
//    }
//}
